\section{Derivative of Matrix Determinant}

We try to derive the formular for linear form of matrix derivative, namely $\frac{\partial det(\textbf{X})}{\partial \textbf{X}}$. Before moving on, we first introduce one theorem which will be used later. It's called \textit{Sylvester's determinant theorem}, which can be phrased like this

\begin{equation}\nonumber
det(\textbf{I}_{m} + \textbf{AB}) = det(\textbf{I}_{n} + \textbf{BA})
\end{equation}

Here $\textbf{I}_m$ and $\textbf{I}_n$ are the $m \times m$ and $n \times n$ identity matrix respectively while \textbf{A} and \textbf{B} are the $m \times n$ and $n \times m$ matrix respectively. Based on such theorem, we have some interesting consequences. One of them, which is useful in our case, is the following

\begin{equation}\nonumber
det(\textbf{X}+\textbf{ab}^T) = det(\textbf{X})(1+\textbf{b}^T\textbf{X}^{-1}\textbf{a})
\end{equation}

where \textbf{a} and \textbf{b} are both column vectors.

Now let's derive the formula for derivative of matrix determinant starting from the definition of derivative.

\begin{equation}\nonumber
\begin{split}
\frac{\partial det(\textbf{X})}{\partial \textbf{X}} &= \begin{pmatrix} 
\frac{\partial det(\textbf{X})}{\partial \textbf{X}_{11}} & \frac{\partial det(\textbf{X})}{\partial \textbf{X}_{12}} & \cdots & \frac{\partial det(\textbf{X})}{\partial \textbf{X}_{1n}} \\
\frac{\partial det(\textbf{X})}{\partial \textbf{X}_{21}} & \frac{\partial det(\textbf{X})}{\partial \textbf{X}_{22}} & \cdots & \frac{\partial det(\textbf{X})}{\partial \textbf{X}_{2n}} \\
\vdots & \vdots & \ddots & \vdots\\
\frac{\partial det(\textbf{X})}{\partial \textbf{X}_{n1}} & \frac{\partial det(\textbf{X})}{\partial \textbf{X}_{n2}} & \cdots & \frac{\partial det(\textbf{X})}{\partial \textbf{X}_{nn}} 
\end{pmatrix}\\
\frac{\partial det(\textbf{X})}{\partial \textbf{X}_{ij}} = \lim_{\Delta x \rightarrow 0} &\frac{det(\textbf{X}+\boldsymbol{\Gamma}) - det(\textbf{X})}{\Delta x}, \boldsymbol{\Gamma} = \begin{pmatrix} 
0 & 0 & \cdots & \cdots & \cdots & 0 \\
0 & 0 & \cdots & \cdots & \cdots & 0 \\
\vdots & \vdots & \ddots & \boldsymbol{\Gamma}_{ij} & \ddots & \vdots\\
0 & 0 & \cdots & \cdots & \cdots & 0 
\end{pmatrix}, \boldsymbol{\Gamma}_{ij} = \Delta x\\
\boldsymbol{\Gamma} = \textbf{c} \otimes \textbf{r} = \textbf{cr}^T, &\textbf{c} = \begin{pmatrix} 0 \\ 0 \\ \vdots \\ \textbf{c}_i \\ \vdots \\ 0 \end{pmatrix}, \textbf{r} = \begin{pmatrix} 0 \\ 0 \\ \vdots \\ \textbf{r}_j \\ \vdots \\ 0 \end{pmatrix}, \textbf{c}_i = 1, \textbf{r}_j = \Delta x\\
\Rightarrow \frac{\partial det(\textbf{X})}{\partial \textbf{X}_{ij}} &= \lim_{\Delta x \rightarrow 0} \frac{det(\textbf{X})(1+\textbf{r}^T\textbf{X}^{-1}\textbf{c}) - det(\textbf{X})}{\Delta x}\\ 
&= det(\textbf{X})\lim_{\Delta x \rightarrow 0} \frac{\textbf{r}^T\textbf{X}^{-1}\textbf{c}}{\Delta x} = det(\textbf{X})\textbf{X}^{-1}_{ji} = det(\textbf{X})(\textbf{X}^{-1})^{T}_{ij}\\
\Rightarrow \frac{\partial det(\textbf{X})}{\partial \textbf{X}} &= det(\textbf{X})(\textbf{X}^{-1})^{T}
\end{split}
\end{equation}

